library(tidyverse)
library(parlitools)
library(ggthemes)
library(tidylog)

#GET CROSS-SECTIONAL CLIMATE TWEETS DATA
load("data/analysis/MPtweetsv2.Rdata")
climterms <- readRDS("data/output/climgenterms.rds")

MPtotals <- MPtweets %>%
  mutate(obs = 1) %>%
  group_by(username, party_value, full_name_value, 
           gender_value, about, constituency_name, gss_code) %>%
  summarise(sum_tweets = sum(obs))

MPpctclimts <- MPtweets %>%
  mutate(obs=1) %>%
  filter(grepl(climterms,tweet, ignore.case = T)) %>%
  group_by(username) %>%
  summarise(sum_ctweets = sum(obs)) %>%
  full_join(MPtotals,  by="username") %>%
  mutate(sum_ctweets= ifelse(is.na(sum_ctweets), 0, sum_ctweets),
         pctclim = (sum_ctweets/sum_tweets)*100)

#GET CROSS-SECTIONAL SPEECHES DATA
speeches <- read_csv("data/output/speeches.csv")

#get total speeches by MP: group speeches by "Hansard "about" ID for merge
MPtotalspchs <- speeches %>%
  mutate(obs =1) %>%
  group_by(about) %>%
  summarise(sum_spchs = sum(obs))

#get % of total speeches mentioning climate phrases by MP: group speeches by "Hansard "about" ID for merge
MPpctclimhspchs <- speeches %>%
  mutate(obs=1) %>%
  filter(grepl(climterms,speech_text, ignore.case = T)) %>%
  group_by(about) %>%
  summarise(sum_cspchs = sum(obs)) %>%
  full_join(MPtotalspchs,  by="about") %>%
  mutate(sum_cspchs= ifelse(is.na(sum_cspchs), 0, sum_cspchs),
         pctclimh = (sum_cspchs/sum_spchs)*100,
         about=as.double(about)) %>%
  dplyr::select(about, sum_spchs, sum_cspchs, pctclimh)

#GET CONSTITUENCY GSS CODES TAKEN FROM parlitools PACKAGE
load("data/output/cons_codes.RData")

#GET CROSS-SECTIONAL FFF DATA
fff_events <- read_csv("data/output/gss_FFF_events.csv")

#MERGE ACROSS DATA SOURCES
#merge MP climate tweets and climate speech percentages
MPpctclimspchts <- MPpctclimts %>%
  full_join(MPpctclimhspchs, by = "about")

#set NAs to 0 for Rebecca Harris and Mike Wood
MPpctclimspchts$sum_spchs[is.na(MPpctclimspchts$sum_spchs)] <- 0
MPpctclimspchts$sum_cspchs[is.na(MPpctclimspchts$sum_cspchs)] <- 0
MPpctclimspchts$pctclimh[is.na(MPpctclimspchts$pctclimh)] <- 0

#merge protest events
MPpctclimspchtsmf <-  merge(MPpctclimspchts, fff_events, by="gss_code", all.x=T)
MPpctclimspchtsmf$sum_FFF_events <- ifelse(is.na(MPpctclimspchtsmf$sum_FFF_events), 0, 
                                           MPpctclimspchtsmf$sum_FFF_events)


MP_cs_all <- MPpctclimspchtsmf %>%
  mutate(full_name = full_name_value,
         gender = gender_value,
         pctclim_twts = pctclim,
         pctclim_spchs = pctclimh) %>%
  dplyr::select(about, username, full_name, gss_code, constituency_name, party_value, gender, sum_tweets,
         sum_ctweets, sum_spchs, sum_cspchs, pctclim_twts, pctclim_spchs, sum_FFF_events)

write_csv(MP_cs_all, "data/analysis/MP_cs_all.csv")
